
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.{SparkConf, SparkContext}


object KMeanExample {
  def main(args: Array[String]) {

    Logger.getLogger(this.getClass).setLevel(Level.OFF)

    val conf = new SparkConf().setAppName("kmeans").setMaster("local[4]")
    val sc = new SparkContext(conf)

    //从数据中抽取特征向量，本例是一行数据一个向量
    val filePath = "E:\\IDEA_社区版_工作环境\\spark\\k-means2\\src\\kmeans_data.txt"

    //"/usr/spark/spark-1.6.0-bin-hadoop2.6/data/mllib/kmeans_data.txt"
    val data = sc.textFile(filePath).map(line=>Vectors.dense(line.split("\\s").map(_.toDouble))).cache()
    //2类
    val numCluster = 2
    //10次迭代
    val numIterators = 10

    //训练模型
    val clusters = KMeans.train(data,numCluster,numIterators)
    //计算方差
    println(s"方差　：${clusters.computeCost(data)}")

    //输出每一类的中心
    for(one <- clusters.clusterCenters){
      println(s"中心点:${one}")
    }
  }
}
